{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import logging\n",
    "import sys\n",
    "\n",
    "from phoenix.experimental.evals import (\n",
    "    RAG_RELEVANCY_PROMPT_TEMPLATE,\n",
    "    OpenAIModel,\n",
    "    download_benchmark_dataset,\n",
    "    llm_classify,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "logging.basicConfig(level=logging.INFO, stream=sys.stdout)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = download_benchmark_dataset(\n",
    "    task=\"binary-relevance-classification\", dataset_name=\"wiki_qa-train\"\n",
    ").rename(columns={\"query_text\": \"query\", \"document_text\": \"reference\"})\n",
    "df.head()\n",
    "\n",
    "llm_classify(\n",
    "    dataframe=df[:2],\n",
    "    template=RAG_RELEVANCY_PROMPT_TEMPLATE,\n",
    "    # template=\"hello world\",\n",
    "    model=OpenAIModel(),\n",
    "    rails=[\"relevant\", \"irrelevant\"],\n",
    ")"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
